Dr. Chan, Chun-Hsiang
Department of Geography, National Taiwan Normal University
# import packages
import requests
from bs4 import BeautifulSoup
# send post data
payload = {
'from': '/bbs/Gossiping/index.html',
'yes': 'yes'
}
# adopt the session to record this current cookie
rs = requests.session()
# post information data
response = rs.post("https://www.ptt.cc/ask/over18", data=payload)
# re-get Gossip page again
response = rs.get("https://www.ptt.cc/bbs/Gossiping/index.html")
print(response.status_code)
200
# use beautifulsoup to parse the html file
root = BeautifulSoup(response.text, "html.parser")
# find all div with the class name "title"
links = root.find_all("div", class_="title")
# article title
for link in links:
print(link.a["href"])
/bbs/Gossiping/M.1713346551.A.BD2.html /bbs/Gossiping/M.1713346618.A.16C.html /bbs/Gossiping/M.1713346619.A.094.html /bbs/Gossiping/M.1713346680.A.9A7.html /bbs/Gossiping/M.1713346681.A.B09.html /bbs/Gossiping/M.1713346707.A.7EC.html /bbs/Gossiping/M.1713346739.A.1F7.html /bbs/Gossiping/M.1713346740.A.CB9.html /bbs/Gossiping/M.1713346833.A.365.html /bbs/Gossiping/M.1713346893.A.C4E.html /bbs/Gossiping/M.1713346969.A.B9B.html /bbs/Gossiping/M.1713346974.A.B67.html /bbs/Gossiping/M.1713346983.A.BA7.html /bbs/Gossiping/M.1713347137.A.A8A.html /bbs/Gossiping/M.1713347211.A.AB5.html /bbs/Gossiping/M.1713347246.A.4EA.html /bbs/Gossiping/M.1713347320.A.8C7.html /bbs/Gossiping/M.1713347326.A.952.html /bbs/Gossiping/M.1713347404.A.3A2.html /bbs/Gossiping/M.1713347432.A.A32.html /bbs/Gossiping/M.1699632792.A.2CB.html /bbs/Gossiping/M.1712286517.A.F21.html /bbs/Gossiping/M.1712412585.A.D57.html /bbs/Gossiping/M.1712662710.A.6A7.html
# re-construct the webpage link with headings
for link in links:
page_url = "https://www.ptt.cc"+link.a["href"]
print(page_url)
https://www.ptt.cc/bbs/Gossiping/M.1713346551.A.BD2.html https://www.ptt.cc/bbs/Gossiping/M.1713346618.A.16C.html https://www.ptt.cc/bbs/Gossiping/M.1713346619.A.094.html https://www.ptt.cc/bbs/Gossiping/M.1713346680.A.9A7.html https://www.ptt.cc/bbs/Gossiping/M.1713346681.A.B09.html https://www.ptt.cc/bbs/Gossiping/M.1713346707.A.7EC.html https://www.ptt.cc/bbs/Gossiping/M.1713346739.A.1F7.html https://www.ptt.cc/bbs/Gossiping/M.1713346740.A.CB9.html https://www.ptt.cc/bbs/Gossiping/M.1713346833.A.365.html https://www.ptt.cc/bbs/Gossiping/M.1713346893.A.C4E.html https://www.ptt.cc/bbs/Gossiping/M.1713346969.A.B9B.html https://www.ptt.cc/bbs/Gossiping/M.1713346974.A.B67.html https://www.ptt.cc/bbs/Gossiping/M.1713346983.A.BA7.html https://www.ptt.cc/bbs/Gossiping/M.1713347137.A.A8A.html https://www.ptt.cc/bbs/Gossiping/M.1713347211.A.AB5.html https://www.ptt.cc/bbs/Gossiping/M.1713347246.A.4EA.html https://www.ptt.cc/bbs/Gossiping/M.1713347320.A.8C7.html https://www.ptt.cc/bbs/Gossiping/M.1713347326.A.952.html https://www.ptt.cc/bbs/Gossiping/M.1713347404.A.3A2.html https://www.ptt.cc/bbs/Gossiping/M.1713347432.A.A32.html https://www.ptt.cc/bbs/Gossiping/M.1699632792.A.2CB.html https://www.ptt.cc/bbs/Gossiping/M.1712286517.A.F21.html https://www.ptt.cc/bbs/Gossiping/M.1712412585.A.D57.html https://www.ptt.cc/bbs/Gossiping/M.1712662710.A.6A7.html
# get the webpage source code
response = rs.get(page_url)
# prettify with beautifulsoup with html parser
result = BeautifulSoup(response.text, "html.parser")
# print the prettified source code
result
<!DOCTYPE html> <html> <head> <meta charset="utf-8"/> <meta content="width=device-width, initial-scale=1" name="viewport"/> <title>求助!家人車禍行車紀錄 - 看板 Gossiping - 批踢踢實業坊</title> <meta content="all" name="robots"/> <meta content="Ptt BBS 批踢踢" name="keywords"/> <meta content="媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察 希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供 相關的影像或行車紀錄器 謝謝各位大恩大德 地點: 台南市北區臨安路跟海安路交叉口。 " name="description"/> <meta content="Ptt 批踢踢實業坊" property="og:site_name"/> <meta content="求助!家人車禍行車紀錄" property="og:title"/> <meta content="媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察 希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供 相關的影像或行車紀錄器 謝謝各位大恩大德 地點: 台南市北區臨安路跟海安路交叉口。 " property="og:description"/> <link href="https://www.ptt.cc/bbs/Gossiping/M.1712662710.A.6A7.html" rel="canonical"/> <link href="//images.ptt.cc/bbs/v2.27/bbs-common.css" rel="stylesheet" type="text/css"/> <link href="//images.ptt.cc/bbs/v2.27/bbs-base.css" media="screen" rel="stylesheet" type="text/css"/> <link href="//images.ptt.cc/bbs/v2.27/bbs-custom.css" rel="stylesheet" type="text/css"/> <link href="//images.ptt.cc/bbs/v2.27/pushstream.css" media="screen" rel="stylesheet" type="text/css"/> <link href="//images.ptt.cc/bbs/v2.27/bbs-print.css" media="print" rel="stylesheet" type="text/css"/> </head> <body> <div id="topbar-container"> <div class="bbs-content" id="topbar"> <a href="/bbs/" id="logo">批踢踢實業坊</a> <span>›</span> <a class="board" href="/bbs/Gossiping/index.html"><span class="board-label">看板 </span>Gossiping</a> <a class="right small" href="/about.html">關於我們</a> <a class="right small" href="/contact.html">聯絡資訊</a> </div> </div> <div id="navigation-container"> <div class="bbs-content" id="navigation"> <a class="board" href="/bbs/Gossiping/index.html">返回看板</a> <div class="bar"></div> </div> </div> <div id="main-container"> <div class="bbs-screen bbs-content" id="main-content"><div class="article-metaline"><span class="article-meta-tag">作者</span><span class="article-meta-value">derrick1680 ()</span></div><div class="article-metaline-right"><span class="article-meta-tag">看板</span><span class="article-meta-value">Gossiping</span></div><div class="article-metaline"><span class="article-meta-tag">標題</span><span class="article-meta-value">求助!家人車禍行車紀錄</span></div><div class="article-metaline"><span class="article-meta-tag">時間</span><span class="article-meta-value">Tue Apr 9 19:38:23 2024</span></div> 媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察 希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供 相關的影像或行車紀錄器 謝謝各位大恩大德 地點: 台南市北區臨安路跟海安路交叉口。 時間:2024/4/9 早上 9:16~9:18 若有人或店家有相關影像 請聯絡: 0961122889 許先生 0987122899 許先生 再麻煩大家幫忙 謝謝各位! -- <span class="f2">※ 發信站: 批踢踢實業坊(ptt.cc), 來自: 219.71.103.32 (臺灣) </span><span class="f2">※ 文章網址: <a href="https://www.ptt.cc/bbs/Gossiping/M.1712662710.A.6A7.html" rel="noreferrer noopener nofollow" target="_blank">https://www.ptt.cc/bbs/Gossiping/M.1712662710.A.6A7.html</a> </span><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">ubcs</span><span class="f3 push-content">: 麻煩分類改協尋喔</span><span class="push-ipdatetime"> 59.120.192.119 04/09 19:38 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">tetani</span><span class="f3 push-content">: 看地圖 在那邊超多政府監視器 都是壞的?</span><span class="push-ipdatetime"> 61.227.188.159 04/09 19:40 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">home4129</span><span class="f3 push-content">: 推一下</span><span class="push-ipdatetime"> 180.217.232.78 04/09 19:43 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">waa006</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 223.138.73.235 04/09 19:49 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">genaro</span><span class="f3 push-content">: 推</span><span class="push-ipdatetime"> 219.71.35.114 04/09 20:44 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 加油 我以前在善化被計程車逼車摔車 警察</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 說找不到監視器 我還帶著傷自己去旁邊店</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 家要到車牌 然後承辦員警還想搓湯圓 恐嚇</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 我摔車當下走到旁邊也算肇逃 台南警察真</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 的很垃圾</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">spring719</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 111.82.74.81 04/10 03:24 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">shownlin</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 27.53.8.127 04/10 11:58 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">poasdm</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime">122.118.206.177 04/10 12:03 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">Isis0311</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 114.136.76.102 04/10 13:54 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">nonsu</span><span class="f3 push-content">: 幫高調</span><span class="push-ipdatetime"> 111.82.86.9 04/10 15:55 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">bingreen</span><span class="f3 push-content">: 幫高</span><span class="push-ipdatetime">111.184.234.172 04/10 19:11 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">a22028504</span><span class="f3 push-content">: 推</span><span class="push-ipdatetime"> 219.84.253.220 04/10 19:28 </span></div><div class="push"><span class="f1 hl push-tag">噓 </span><span class="f3 hl push-userid">windsp0419</span><span class="f3 push-content">: 怎自己不裝行車記錄器</span><span class="push-ipdatetime"> 36.238.156.135 04/10 20:50 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">TZUYIC</span><span class="f3 push-content">: 電子書螢幕會有手機好看嗎,不怎麼吸引人</span><span class="push-ipdatetime"> 101.137.181.95 04/10 23:53 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">TZUYIC</span><span class="f3 push-content">: 推錯,請無視。</span><span class="push-ipdatetime"> 101.137.181.95 04/10 23:53 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">gl4su06</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 101.9.97.91 04/11 07:01 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">DOOT</span><span class="f3 push-content">: 順風本來就是這麼虐 反觀某高中生</span><span class="push-ipdatetime">223.141.210.167 04/11 08:19 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">ruby54332521</span><span class="f3 push-content">: 推</span><span class="push-ipdatetime"> 114.43.208.150 04/11 11:31 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">jal809</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 114.45.14.89 04/11 17:54 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">whitefox</span><span class="f3 push-content">: 原來是樹本</span><span class="push-ipdatetime">118.150.192.235 04/11 18:27 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">kaodio</span><span class="f3 push-content">: 所以說偷拍是不是比較不嚴重的戀童啊</span><span class="push-ipdatetime"> 118.168.199.9 04/11 19:33 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">abb123456</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 223.136.80.155 04/12 02:16 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">luciffar</span><span class="f3 push-content">: 2.8*5你不會算???</span><span class="push-ipdatetime"> 111.71.214.19 04/13 17:29 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">luciffar</span><span class="f3 push-content">: 抱歉文章被刪了 推文跑來這</span><span class="push-ipdatetime"> 111.71.214.19 04/13 17:31 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">axixi</span><span class="f3 push-content">: 推</span><span class="push-ipdatetime"> 49.216.128.150 04/13 17:53 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">aiweisen</span><span class="f3 push-content">: 幫高調</span><span class="push-ipdatetime">223.136.218.155 04/15 15:52 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">jetalpha</span><span class="f3 push-content">: 幫高調</span><span class="push-ipdatetime"> 1.175.88.251 04/15 17:19 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">cdvfbgnh</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 180.217.251.26 04/16 09:25 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">ckyuzi</span><span class="f3 push-content">: 漲價跟電夠不夠有什麼關係</span><span class="push-ipdatetime"> 1.164.156.1 04/16 16:26 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">s629627439</span><span class="f3 push-content">: 海德格救我~~</span><span class="push-ipdatetime"> 49.216.91.6 04/16 18:25 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">s629627439</span><span class="f3 push-content">: 沒事,我回錯文</span><span class="push-ipdatetime"> 49.216.91.6 04/16 18:25 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">dw7931425</span><span class="f3 push-content">: 炒青椒要加什麼才專業?</span><span class="push-ipdatetime"> 114.33.133.253 04/16 20:05 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">ralfeistein</span><span class="f3 push-content">: 所以核電廠跟溫泉偷拍的關係是???</span><span class="push-ipdatetime"> 111.253.99.194 04/16 20:27 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">hsulunlee</span><span class="f3 push-content">: 有感!!</span><span class="push-ipdatetime"> 58.114.69.4 04/17 03:01 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">hsulunlee</span><span class="f3 push-content">: 不好意思回錯了T-T</span><span class="push-ipdatetime"> 58.114.69.4 04/17 03:01 </span></div></div> <div data-longpollurl="/v1/longpoll?id=aaff77aa84dc9c6073eb7d32c668383928aa5769" data-offset="4649" data-pollurl="/poll/Gossiping/M.1712662710.A.6A7.html?cacheKey=2431-551135063&offset=4649&offset-sig=e8d4ee33a4e25f2e252e0ea82cbaaae4eaa215c5" id="article-polling"></div> <div class="bbs-screen bbs-footer-message">本網站已依台灣網站內容分級規定處理。此區域為限制級,未滿十八歲者不得瀏覽。</div> </div> <script async="" src="https://www.googletagmanager.com/gtag/js?id=G-DZ6Y3BY9GW"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'G-DZ6Y3BY9GW'); </script> <script> (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); ga('create', 'UA-32365737-1', { cookieDomain: 'ptt.cc', legacyCookieDomain: 'ptt.cc' }); ga('send', 'pageview'); </script> <script src="//ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script> <script src="//images.ptt.cc/bbs/v2.27/bbs.js"></script> </body> </html>
# find the div element with an id of main-content
main_content = result.find("div", id="main-content")
# print the main content
main_content
<div class="bbs-screen bbs-content" id="main-content"><div class="article-metaline"><span class="article-meta-tag">作者</span><span class="article-meta-value">derrick1680 ()</span></div><div class="article-metaline-right"><span class="article-meta-tag">看板</span><span class="article-meta-value">Gossiping</span></div><div class="article-metaline"><span class="article-meta-tag">標題</span><span class="article-meta-value">求助!家人車禍行車紀錄</span></div><div class="article-metaline"><span class="article-meta-tag">時間</span><span class="article-meta-value">Tue Apr 9 19:38:23 2024</span></div> 媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察 希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供 相關的影像或行車紀錄器 謝謝各位大恩大德 地點: 台南市北區臨安路跟海安路交叉口。 時間:2024/4/9 早上 9:16~9:18 若有人或店家有相關影像 請聯絡: 0961122889 許先生 0987122899 許先生 再麻煩大家幫忙 謝謝各位! -- <span class="f2">※ 發信站: 批踢踢實業坊(ptt.cc), 來自: 219.71.103.32 (臺灣) </span><span class="f2">※ 文章網址: <a href="https://www.ptt.cc/bbs/Gossiping/M.1712662710.A.6A7.html" rel="noreferrer noopener nofollow" target="_blank">https://www.ptt.cc/bbs/Gossiping/M.1712662710.A.6A7.html</a> </span><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">ubcs</span><span class="f3 push-content">: 麻煩分類改協尋喔</span><span class="push-ipdatetime"> 59.120.192.119 04/09 19:38 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">tetani</span><span class="f3 push-content">: 看地圖 在那邊超多政府監視器 都是壞的?</span><span class="push-ipdatetime"> 61.227.188.159 04/09 19:40 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">home4129</span><span class="f3 push-content">: 推一下</span><span class="push-ipdatetime"> 180.217.232.78 04/09 19:43 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">waa006</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 223.138.73.235 04/09 19:49 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">genaro</span><span class="f3 push-content">: 推</span><span class="push-ipdatetime"> 219.71.35.114 04/09 20:44 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 加油 我以前在善化被計程車逼車摔車 警察</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 說找不到監視器 我還帶著傷自己去旁邊店</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 家要到車牌 然後承辦員警還想搓湯圓 恐嚇</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 我摔車當下走到旁邊也算肇逃 台南警察真</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">noah23</span><span class="f3 push-content">: 的很垃圾</span><span class="push-ipdatetime"> 106.68.248.62 04/09 22:08 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">spring719</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 111.82.74.81 04/10 03:24 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">shownlin</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 27.53.8.127 04/10 11:58 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">poasdm</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime">122.118.206.177 04/10 12:03 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">Isis0311</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 114.136.76.102 04/10 13:54 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">nonsu</span><span class="f3 push-content">: 幫高調</span><span class="push-ipdatetime"> 111.82.86.9 04/10 15:55 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">bingreen</span><span class="f3 push-content">: 幫高</span><span class="push-ipdatetime">111.184.234.172 04/10 19:11 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">a22028504</span><span class="f3 push-content">: 推</span><span class="push-ipdatetime"> 219.84.253.220 04/10 19:28 </span></div><div class="push"><span class="f1 hl push-tag">噓 </span><span class="f3 hl push-userid">windsp0419</span><span class="f3 push-content">: 怎自己不裝行車記錄器</span><span class="push-ipdatetime"> 36.238.156.135 04/10 20:50 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">TZUYIC</span><span class="f3 push-content">: 電子書螢幕會有手機好看嗎,不怎麼吸引人</span><span class="push-ipdatetime"> 101.137.181.95 04/10 23:53 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">TZUYIC</span><span class="f3 push-content">: 推錯,請無視。</span><span class="push-ipdatetime"> 101.137.181.95 04/10 23:53 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">gl4su06</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 101.9.97.91 04/11 07:01 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">DOOT</span><span class="f3 push-content">: 順風本來就是這麼虐 反觀某高中生</span><span class="push-ipdatetime">223.141.210.167 04/11 08:19 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">ruby54332521</span><span class="f3 push-content">: 推</span><span class="push-ipdatetime"> 114.43.208.150 04/11 11:31 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">jal809</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 114.45.14.89 04/11 17:54 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">whitefox</span><span class="f3 push-content">: 原來是樹本</span><span class="push-ipdatetime">118.150.192.235 04/11 18:27 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">kaodio</span><span class="f3 push-content">: 所以說偷拍是不是比較不嚴重的戀童啊</span><span class="push-ipdatetime"> 118.168.199.9 04/11 19:33 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">abb123456</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 223.136.80.155 04/12 02:16 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">luciffar</span><span class="f3 push-content">: 2.8*5你不會算???</span><span class="push-ipdatetime"> 111.71.214.19 04/13 17:29 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">luciffar</span><span class="f3 push-content">: 抱歉文章被刪了 推文跑來這</span><span class="push-ipdatetime"> 111.71.214.19 04/13 17:31 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">axixi</span><span class="f3 push-content">: 推</span><span class="push-ipdatetime"> 49.216.128.150 04/13 17:53 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">aiweisen</span><span class="f3 push-content">: 幫高調</span><span class="push-ipdatetime">223.136.218.155 04/15 15:52 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">jetalpha</span><span class="f3 push-content">: 幫高調</span><span class="push-ipdatetime"> 1.175.88.251 04/15 17:19 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">cdvfbgnh</span><span class="f3 push-content">: 高調</span><span class="push-ipdatetime"> 180.217.251.26 04/16 09:25 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">ckyuzi</span><span class="f3 push-content">: 漲價跟電夠不夠有什麼關係</span><span class="push-ipdatetime"> 1.164.156.1 04/16 16:26 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">s629627439</span><span class="f3 push-content">: 海德格救我~~</span><span class="push-ipdatetime"> 49.216.91.6 04/16 18:25 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">s629627439</span><span class="f3 push-content">: 沒事,我回錯文</span><span class="push-ipdatetime"> 49.216.91.6 04/16 18:25 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">dw7931425</span><span class="f3 push-content">: 炒青椒要加什麼才專業?</span><span class="push-ipdatetime"> 114.33.133.253 04/16 20:05 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">ralfeistein</span><span class="f3 push-content">: 所以核電廠跟溫泉偷拍的關係是???</span><span class="push-ipdatetime"> 111.253.99.194 04/16 20:27 </span></div><div class="push"><span class="hl push-tag">推 </span><span class="f3 hl push-userid">hsulunlee</span><span class="f3 push-content">: 有感!!</span><span class="push-ipdatetime"> 58.114.69.4 04/17 03:01 </span></div><div class="push"><span class="f1 hl push-tag">→ </span><span class="f3 hl push-userid">hsulunlee</span><span class="f3 push-content">: 不好意思回錯了T-T</span><span class="push-ipdatetime"> 58.114.69.4 04/17 03:01 </span></div></div>
# find all span elements with an class of article-meta-value
article_info = main_content.find_all("span", class_="article-meta-value")
# print article info
article_info
[<span class="article-meta-value">derrick1680 ()</span>, <span class="article-meta-value">Gossiping</span>, <span class="article-meta-value">求助!家人車禍行車紀錄</span>, <span class="article-meta-value">Tue Apr 9 19:38:23 2024</span>]
# if info exists
if len(article_info) != 0:
author = article_info[0].string # author
title = article_info[2].string # title
time = article_info[3].string # time
# avoid missing info
else:
author = "無" # author
title = "無" # title
time = "無" # time
# get all text from the article
all_text = main_content.text
# print the article
all_text
'作者derrick1680 ()看板Gossiping標題求助!家人車禍行車紀錄時間Tue Apr 9 19:38:23 2024\n媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察\n希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供\n相關的影像或行車紀錄器\n謝謝各位大恩大德\n\n地點: 台南市北區臨安路跟海安路交叉口。\n\n時間:2024/4/9 早上 9:16~9:18\n\n若有人或店家有相關影像\n\n請聯絡:\n\n0961122889 許先生\n0987122899 許先生\n\n再麻煩大家幫忙 謝謝各位!\n\n--\n※ 發信站: 批踢踢實業坊(ptt.cc), 來自: 219.71.103.32 (臺灣)\n※ 文章網址: https://www.ptt.cc/bbs/Gossiping/M.1712662710.A.6A7.html\n推 ubcs: 麻煩分類改協尋喔 59.120.192.119 04/09 19:38\n推 tetani: 看地圖 在那邊超多政府監視器 都是壞的? 61.227.188.159 04/09 19:40\n推 home4129: 推一下 180.217.232.78 04/09 19:43\n推 waa006: 高調 223.138.73.235 04/09 19:49\n推 genaro: 推 219.71.35.114 04/09 20:44\n推 noah23: 加油 我以前在善化被計程車逼車摔車 警察 106.68.248.62 04/09 22:08\n→ noah23: 說找不到監視器 我還帶著傷自己去旁邊店 106.68.248.62 04/09 22:08\n→ noah23: 家要到車牌 然後承辦員警還想搓湯圓 恐嚇 106.68.248.62 04/09 22:08\n→ noah23: 我摔車當下走到旁邊也算肇逃 台南警察真 106.68.248.62 04/09 22:08\n→ noah23: 的很垃圾 106.68.248.62 04/09 22:08\n推 spring719: 高調 111.82.74.81 04/10 03:24\n推 shownlin: 高調 27.53.8.127 04/10 11:58\n推 poasdm: 高調122.118.206.177 04/10 12:03\n推 Isis0311: 高調 114.136.76.102 04/10 13:54\n推 nonsu: 幫高調 111.82.86.9 04/10 15:55\n推 bingreen: 幫高111.184.234.172 04/10 19:11\n推 a22028504: 推 219.84.253.220 04/10 19:28\n噓 windsp0419: 怎自己不裝行車記錄器 36.238.156.135 04/10 20:50\n推 TZUYIC: 電子書螢幕會有手機好看嗎,不怎麼吸引人 101.137.181.95 04/10 23:53\n→ TZUYIC: 推錯,請無視。 101.137.181.95 04/10 23:53\n推 gl4su06: 高調 101.9.97.91 04/11 07:01\n推 DOOT: 順風本來就是這麼虐 反觀某高中生223.141.210.167 04/11 08:19\n推 ruby54332521: 推 114.43.208.150 04/11 11:31\n推 jal809: 高調 114.45.14.89 04/11 17:54\n推 whitefox: 原來是樹本118.150.192.235 04/11 18:27\n→ kaodio: 所以說偷拍是不是比較不嚴重的戀童啊 118.168.199.9 04/11 19:33\n推 abb123456: 高調 223.136.80.155 04/12 02:16\n→ luciffar: 2.8*5你不會算??? 111.71.214.19 04/13 17:29\n→ luciffar: 抱歉文章被刪了 推文跑來這 111.71.214.19 04/13 17:31\n推 axixi: 推 49.216.128.150 04/13 17:53\n→ aiweisen: 幫高調223.136.218.155 04/15 15:52\n推 jetalpha: 幫高調 1.175.88.251 04/15 17:19\n推 cdvfbgnh: 高調 180.217.251.26 04/16 09:25\n推 ckyuzi: 漲價跟電夠不夠有什麼關係 1.164.156.1 04/16 16:26\n推 s629627439: 海德格救我~~ 49.216.91.6 04/16 18:25\n→ s629627439: 沒事,我回錯文 49.216.91.6 04/16 18:25\n→ dw7931425: 炒青椒要加什麼才專業? 114.33.133.253 04/16 20:05\n推 ralfeistein: 所以核電廠跟溫泉偷拍的關係是??? 111.253.99.194 04/16 20:27\n推 hsulunlee: 有感!! 58.114.69.4 04/17 03:01\n→ hsulunlee: 不好意思回錯了T-T 58.114.69.4 04/17 03:01\n'
# split and get the article text by the last "--"
pre_texts = all_text.split("--")[:-1]
# print the article text
pre_texts
['作者derrick1680 ()看板Gossiping標題求助!家人車禍行車紀錄時間Tue Apr 9 19:38:23 2024\n媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察\n希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供\n相關的影像或行車紀錄器\n謝謝各位大恩大德\n\n地點: 台南市北區臨安路跟海安路交叉口。\n\n時間:2024/4/9 早上 9:16~9:18\n\n若有人或店家有相關影像\n\n請聯絡:\n\n0961122889 許先生\n0987122899 許先生\n\n再麻煩大家幫忙 謝謝各位!\n\n']
# 將前面的所有內容合併成一個
one_text = "--".join(pre_texts)
# print the merged text
one_text
'作者derrick1680 ()看板Gossiping標題求助!家人車禍行車紀錄時間Tue Apr 9 19:38:23 2024\n媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察\n希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供\n相關的影像或行車紀錄器\n謝謝各位大恩大德\n\n地點: 台南市北區臨安路跟海安路交叉口。\n\n時間:2024/4/9 早上 9:16~9:18\n\n若有人或店家有相關影像\n\n請聯絡:\n\n0961122889 許先生\n0987122899 許先生\n\n再麻煩大家幫忙 謝謝各位!\n\n'
# split by "\n" and skip the first row (b'cuz it is the header)
texts = one_text.split("\n")[1:]
# print the text
texts
['媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察', '希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供', '相關的影像或行車紀錄器', '謝謝各位大恩大德', '', '地點: 台南市北區臨安路跟海安路交叉口。', '', '時間:2024/4/9 早上 9:16~9:18', '', '若有人或店家有相關影像', '', '請聯絡:', '', '0961122889 許先生', '0987122899 許先生', '', '再麻煩大家幫忙 謝謝各位!', '', '']
# merge all rows together
content = "\n".join(texts)
# print the merged content
content
'媽媽發生車禍後大量硬腦膜下出血 目前在加護病房中昏迷觀察\n希望在2024/4/9早上9點18分經過台南市北區臨安路跟海安路交叉路口的朋友能夠提供\n相關的影像或行車紀錄器\n謝謝各位大恩大德\n\n地點: 台南市北區臨安路跟海安路交叉口。\n\n時間:2024/4/9 早上 9:16~9:18\n\n若有人或店家有相關影像\n\n請聯絡:\n\n0961122889 許先生\n0987122899 許先生\n\n再麻煩大家幫忙 謝謝各位!\n\n'
Collect the top 100 posts from ptt guossip page and integrate all information in order with Pandas.DataFrame as follows.
post index | post author | post title | post time | post content | post source IP | comment index | comment type| comment author | comment content | source IP | comment time
Comments under the same post share the same post information.
Turn in materials is depicted as follows.
(1) Download IPython Notebook as html file.
(2) The integrated csv formatted by abovementioned order.